from selenium import webdriver
import os
import time
import re
from bs4 import BeautifulSoup
import random


def makeAllDoctorAskHtmls(theUrl) :

    option = webdriver.ChromeOptions()
    option.add_argument('headless')
    option.add_argument("disable-blink-features=AutomationControlled")  # 就是这一行告诉chrome去掉了webdriver痕迹
    browser = webdriver.Chrome(r'C:\StudySoftware\Anaconda3\chromedriver.exe', options=option)

    browser.get(theUrl)
    # print(browser.page_source)
    try:
        while True:
            button = browser.find_element_by_class_name("msg-more-link-text")
            button.click()
            time.sleep(0.5)
    except :
        # print("点完了")
        pass

    time.sleep(0.5)

    fileName = theUrl.replace('/', '杠')
    fileName = fileName.replace(':', "冒号")
    fileName = fileName.replace('?', "问号")
    fileName = fileName.replace('.', "点")
    fileName += ".html"

    with open("DoctorAskAllHtmls/%s"%fileName,"w",encoding="utf-8") as writerFile:
        writerFile.write(browser.page_source)

    # browser.quit()
    browser.delete_all_cookies()
    browser.close()
    browser.quit()
    print("成功！")

def makeFileDirList():
    fileDir = list()
    for root, dirs, files in os.walk(r"DoctorAskAllHtmls"):
        # print(root)
        for file in files:
            fileDir.append('DoctorAskAllHtmls/'+file)
    return fileDir

def giveUrl():
    number = 1

    dirs = makeFileDirList();



    with open("OtherFiles/DoctorAskNextUsedUrlsSet.csv","r",encoding="utf-8") as readerFile:
        for theUrl in readerFile:
            fileName = theUrl.replace('/', '杠')
            fileName = fileName.replace(':', "冒号")
            fileName = fileName.replace('?', "问号")
            fileName = fileName.replace('.', "点")
            fileName = fileName[0:-1]
            fileName += ".html"
            # print(fileName)

            if 'DoctorAskAllHtmls/'+fileName in dirs:
                print("%s已存在，跳过"%theUrl)
                number += 1
                continue


            print("正在爬取第%d个网站%s"%(number,theUrl),end="")

            try:
                makeAllDoctorAskHtmls(theUrl[0:-1])
            except:
                print("出现错误！")
                with open("OtherFiles/WrongDoctorAskAllHtmls.csv", "a", encoding="utf-8") as writerFile:
                    writerFile.write(theUrl[0:-1]+"\n")
            finally:
                number += 1
        #
        # try:
        #     for i in range(100):
        #         os.system("taskkill /f /im chromedriver.exe /t")
        #         # time.sleep(0.1)
        # except:
        #     print("清除失败")
        time.sleep(random.uniform(1.0,3.0))


if __name__ == "__main__":
    # makeAllDoctorAskHtmls("https://www.haodf.com/bingcheng/8819310520.html")
    # giveUrl()
    giveUrl()